#coding:utf8

'''
#全国企业信用信息公示系统（陕西）
#维护肖迪
'''
from scpy.logger import get_logger
logger = get_logger(__file__)
import pycurl 
import urllib
import re
from utils import kill_captcha
import StringIO
import random
from bs4 import BeautifulSoup
import json
import table
import time
import requests
def td_clean(html):
    html = html.replace('<td>','@@@@@')
    html = re.sub('<td.+?>','<td>',html)
    html = html.replace('@@@@@','<td>')
    return html.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
def reclean(text, html):
    try:
        message = re.findall(text, html)[0].replace('>', '')
        return message
    except:
        return ''
def html_clean(text):
    text = str(text)
    text = re.sub('<[\s\S]+?>', '', text).replace('\n', '').replace(' ', '').replace('\t', '').replace('\r', '')
    return text

def time_time1(time):  #时间函数
    time = time.replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    list1 = []
    if time:
        time1 = re.findall("\d+", time)
        for i in time1:
            if len(i) < 2:
                j = "0" + i
                list1.append(j)
            else:
                list1.append(i)
        time2 = '-'.join(list1) + " 00:00:00"
        return time2
    else:
        return ''
def get_value(regular, text):
    try:
        text = re.sub('<span.+?>','',text)
        value = re.findall(regular, text)[0]
        #value = re.sub('</t.>(.+?)<t.>','',value)注册号
        return value
        #return value.replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    except:
        #print 'or he'
        #print regular,text
        pass
        return ''
def time_cleann(time):  #时间函数
    time = time.replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    list1 = []
    if time:
        time1 = re.findall("\d+", time)
        for i in time1:
            if len(i) < 2:
                j = "0" + i
                list1.append(j)
            else:
                list1.append(i)
        time2 = '-'.join(list1) + " 00:00:00"
        return time2
    else:
        return ''

def curl(url, data='', cookie='', debug=False):  #抓取函数[get,post]
    UserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
    s = StringIO.StringIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.REFERER, 'http://gsxt.scaic.gov.cn/')
    if cookie:
        c.setopt(c.COOKIEJAR, "cookie_file_name1")
    c.setopt(c.COOKIEFILE, "cookie_file_name1")
    c.setopt(pycurl.FOLLOWLOCATION, True)
    if data:
        c.setopt(c.POSTFIELDS, urllib.urlencode(data))
    c.setopt(pycurl.CONNECTTIMEOUT, 60)
    c.setopt(pycurl.TIMEOUT, 120)
    c.setopt(pycurl.ENCODING, 'gzip')
    c.setopt(c.HTTPHEADER, ['Host:117.22.252.219:8002', 'Upgrade-Insecure-Requests:1',
                            'User-Agent:Googlebot/2.1 (+http://www.googlebot.com/bot.html)',
                            'Origin:http://gsxt.scaic.gov.cn'])
    c.setopt(c.WRITEDATA, s)
    c.perform()
    c.close()
    return s.getvalue()
def tr(table):

    html = re.sub('<tr.*?>','<tr>',table)
    return html.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
def td_clean(html):
    html = html.replace('<td>','@@@@@')
    html = re.sub('<td.+?>','<td>',html)
    html = html.replace('@@@@@','<td>')
    return html.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
def shareHolderList_func(table):
    print '!@#'*100
    print table
    table = tr(table)
    print '%'*300
    print table
    detail = re.findall('<tr><td.*?</a></td>',table)
    #print detail
    # del detail[0]
    # del detail[0]
    if detail:
        shareHolderList = []
        print detail
        for i in detail:
            i = td_clean(i)
            #print i
            detail_td = re.findall("<td>(.*?)</td>",i)
            #print detail_td
            #try:
            if len(detail_td) >= 4:
                shareholderType = detail_td[3]
                shareholderName = detail_td[0]
                shareHolderdetail = detail_td[4]
                dic1 = {"shareholderType":shareholderType,"shareholderName":shareholderName,"shareHolderdetail":shareHolderdetail}
                shareHolderList.append(dic1)
            #except:
            #    continue
        return shareHolderList
    else:
        shareHolderList = []
        return shareHolderList
def yearReport(pripid,l,type=0):
    yearList = []
    data_3 = {'method': 'qygsInfo', 'maent.pripid':pripid, 'czmk': 'czmk8', 'random': '%s' % l}
    html_yearReportList = curl('http://xygs.snaic.gov.cn/ztxy.do', data_3).decode('gbk')
    report_year = re.findall('''onclick="doNdbg\('(.+?)'\)''', html_yearReportList)
    #print year
    print html_yearReportList
    print report_year
    if report_year == '':
        return ''
    yearReportList = ''

    for i in report_year:
        print '*'*300
        year = i
        data_2 = {"method": "ndbgDetail", "maent.pripid": "%s" % pripid, "maent.nd": i, "random": "%s" % l}
        print '%'*100
        print data_2
        html_2014 = curl('http://xygs.snaic.gov.cn/ztxy.do', data_2).decode('gbk')
        dict_temp = {'year':year,"html":html_2014}
        yearList.append(dict_temp)
        soup = BeautifulSoup(html_2014, "html.parser")
        html = html_2014.replace('\r', '').replace('\n', '').replace(' ', '').replace('\t', '')
        # open(path + '/a.html', 'w').write(html)
        print html
        regNo = reclean(u'<thwidth="20%.+?>注册号</th><tdwidth="30%">(.+?)</td>', html)
        if not regNo:
            regNo = reclean(u'<thwidth="20%.+?>注册号</th><tdwidth="20%">(.+?)</td>', html)
        phone = reclean(u'<thwidth="20%.+?>企业联系电话</th><tdwidth="30%"(.+?)</td>', html)
        email = reclean(u'<thwidth="20%.+?>电子邮箱</th><tdwidth="30%"(.+?)</td>', html)
        zipcode = reclean(u'<thwidth="20%.+?>邮政编码</th><tdwidth="30%"(.+?)</td>', html)
        enterpriseStatus = reclean(u'<thwidth="20%.+?>企业经营状态</th><tdwidth="30%"(.+?)</td>', html)
        haveWebsite = reclean(u'<thwidth="20%.+?>是否有网站或网店</th><tdwidth="30%"(.+?)</td>', html)
        buyEquity = reclean(u'<thwidth="20%.+?>企业是否有投资信息或购买其他公司股权</th><tdwidth="30%"(.+?)</td>', html)
        equityTransfer = reclean(u'<thwidth="20%.+?>有限责任公司本年度是否发生股东股权转让</th><tdwidth="30%"(.+?)</td>', html)
        address = reclean(u'<thwidth="20%.+?>企业通信地址</th><t.+?>(.+?)</td>', html)
        employeeCount = reclean(u'<thwidth="20%.+?>从业人数</th><tdwidth="30%">(.+?)</td>', html)
        baseInfo = '{"regNo":"%s","phone":"%s","email":"%s","zipcode":"%s","enterpriseStatus":"%s","haveWebsite":"%s","buyEquity":"%s","equityTransfer":"%s","address":"%s","employeeCount":"%s"}' % (
        regNo, phone, email, zipcode, enterpriseStatus, haveWebsite, buyEquity, equityTransfer, address, employeeCount)
        if u'网站或网店信息' in html:
            web = re.findall('<trid="tr_wzxx_1"name="wzxx">(.+?)</tr>', html)
            if bool(web) == False:
                website = '{"type":"","name":"","link":""}'
            else:
                for i in range(len(web)):
                    web1 = re.findall('<td>(.+?)</td>', web[i])
                    website = '{"type":"%s","name":"%s","link":"%s"}' % (web1[0], web1[1], web1[2])
        else:
            website = '{"type":"","name":"","link":""}'
        #print website
        investnum = re.findall('<tr id="tr_tzrxx_\d+" name="tzrxx">([\s\S]+?)</tr>', html_2014)
        num = len(investnum)
        str2 = ''
        for i in range(num):
            investnum1 = investnum[i].replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
            #print investnum
            #shareholderName1=re.findall('<tdstyle="text-align:left;">(.+?)</td>',investnum[i])
            shareholderName = get_value('<divstyle="width:100px;">(.+?)</div>', investnum1)
            subConam = get_value(u'<listyle="text-align:left;">(.+?)&nbsp;</li>', investnum1)
            try:
                subConDate = time_time1(re.findall(u'<listyle="text-align:center;">(\d+年\d+月\d+日)</li>', investnum1)[0])
            except:
                subConDate = ''
            try:
                subConType = re.findall(u'<lititle=".+?"style="text-align:left;">(.+?)</li>', investnum1)[0]
            except:
                subConType = ''
            try:
                paidType = re.findall(u'<lititle=".+?"style="text-align:left;">(.+?)</li>', investnum1)[1]
            except:
                paidType = ''

            paidConMoney = get_value(u'<listyle="text-align:left;">(.+?)&nbsp;</li>', investnum1)
            try:
                paidTime = time_cleann(re.findall(u'<listyle="text-align:center;">(\d+年\d+月\d+日)</li>', investnum1)[1])
            except:
                paidTime = ''

            #ID=re.findall('''<ahref="###"onclick="showRyxx\('(.+?)','(.+?)'\)"''',investnum[i])[0]
            #data_data={'method':'tzrCzxxDetial','maent.xh':'%s'%ID[0],'maent.pripid':'%s'%ID[1],"random":"%s"%l}
            #html_html=curl('http://gsxt.scaic.gov.cn/ztxy.do',data_data)
            #open(path+'/html_html.html','w').write(html_html)
            #regCapCur=get_value('<li style="text-align: left;vertical-align: middle;">&nbsp;(.+?)</li>',html_html).decode('gbk')
            #print regCapCur
            #subConam=get_value('<li style="text-align: right;vertical-align: middle;">(.+?)&nbsp;</li>',html_html)
            #print subConam
            #conDate=get_value('<li>(\d.+?)</li>',html_html)

            detail = '{"shareholderName":"%s","subConam":"%s","subConDate":"%s","subConType":"%s","paidConMoney":"%s","paidTime":"%s","paidType":"%s"}' % (
            shareholderName, subConam, subConDate, subConType, paidConMoney, paidTime, paidType)
            str2 = detail + ',' + str2
        investorInformations = '[%s]' % str2.rstrip('\n').rstrip(',')
        #print soup
        try :
            assetsInfotest = soup.findAll('table', {"class": "detailsList"})[3]
            assetsInfo1 = assetsInfotest.findAll('td')
            #print assetsInfo1
            generalAssets = assetsInfo1[0].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            #print generalAssets
            ownersEequity = assetsInfo1[1].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            #print ownersEequity
            revenue = assetsInfo1[2].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            #print revenue
            profit = assetsInfo1[3].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            mainRevenue = assetsInfo1[4].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            netProfit = assetsInfo1[5].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            taxPayment = assetsInfo1[6].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            liability = assetsInfo1[7].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
            assets_Info = '{"generalAssets":"%s", "ownersEequity":"%s", "revenue":"%s","profit":"%s","mainRevenue":"%s","netProfit":"%s","taxPayment":"%s","liability":"%s"}'%(generalAssets,ownersEequity,revenue,profit,mainRevenue,netProfit,taxPayment,taxPayment)
            #assets_Info = json.dumps(assets_Info,ensure_ascii=False).replace('\r', '')
            assets_Info = str(assets_Info).replace('\r', '')
        except IndexError,e:
            assets_Info = "{}"


        #print assets_Info
        #time.sleep(60)
        strr2 = ''
        for i in soup.findAll('tr', {"name": "bgxx"}):
            jj = i.findAll('td')
            if u'收起更多' in html_clean(jj[3]):
                afterChange1 = html_clean(jj[3]).split(u'更多')
                afterChange = afterChange1[1]
            else:
                afterChange = html_clean(jj[3])
            strr1 = {"changedItem": html_clean(jj[1]), "beforeChange": html_clean(jj[2]), "afterChange": afterChange,
                     "time": html_clean(jj[4])}
            strr1 = str(strr1)
            #print strr1
            strr2 = strr1 + ',' + strr2
        changeRecords = '[%s]' % strr2.rstrip(',')
        #html
        gqbg = re.findall('<trname="gqbg"id="tr_gqbg_\d">(.+?)</tr>',html)
        str_str1=''
        for i in gqbg:
            shareholderName = re.findall('<tdstyle="text-align:left;">(.*?)</td>',i)[0]
            equityBefore = re.findall('<tdstyle="text-align:left;">(.*?)</td>',i)[1]
            equityAfter = re.findall('<tdstyle="text-align:left;">(.*?)</td>',i)[2]
            time = time_time1(re.findall('<tdstyle="text-align:center;">(.*?)</td>',i)[0])
            str2='{"shareholderName":"%s","equityAfter":"%s","equityBefore":"%s","time":"%s"}'%(shareholderName,equityAfter,equityBefore,time)
            str_str1 = str2+','+str_str1
        try:
            equityChangeInformations = "[%s]"%str_str1.rstrip(',')
        except:
            equityChangeInformations = "[]"
        try:
            entinv = re.findall(u'对外投资信息</th></tr>([\s\S]+?)</table>',html)[0]
            enlist = re.findall('<trname="tzxx"id="tr_tzxx_\d">[\s\S]+?</tr>',entinv)
            enlist_str=''
            for i in enlist:
                print i
                result = re.findall('<tdstyle="text-align:left;">(.+?)</td>',i)
                entName = result[0]
                regNo = result[1]
                str1 = '{"entName":"%s","entType":"","fundedRatio":"","currency":"","entStatus":"","canDate":"","esDate":"","regOrg":"","regCapcur":"","regCap":"","revDate":"","name":"","subConam":"","regNo":"%s"}'%(entName,regNo)
                enlist_str = str1+','+enlist_str
            entinvItemList = '[%s]'%enlist_str.rstrip(',')
        except IndexError:
            entinvItemList = '[]'
        print entinvItemList
        yearReportListstr1 = '{"year":"%s","entinvItemList":%s,"baseInfo":%s,"website":%s,"investorInformations":%s,"assetsInfo":%s,"equityChangeInformations":%s,"changeRecords":%s}' % (
        year,entinvItemList,baseInfo, website, investorInformations, assets_Info, equityChangeInformations, changeRecords)
        yearReportList = yearReportListstr1 + ',' + yearReportList

    over = '[%s]' % yearReportList.rstrip(',')
    if type == 0:
        return eval(over)
    else:
        return (eval(over),yearList)
def verify(key,**args):
    global detail_url
    loop_num =0
    s = requests.Session()
    while 1:
        loop_num +=1
        try:
            l = int(time.time() * 1000)
            #code = curl('http://xygs.snaic.gov.cn/ztxy.do?method=index&random=%s'%l,cookie=1)
            #code = re.findall('code:"(.+?)"',code.replace(' ',''))[0]
            #print code
            
            verify_url = "http://xygs.snaic.gov.cn/ztxy.do?method=createYzm&dt=%s&random=%s"%(l,l)
            verify_image = s.get(verify_url).content
            #print verify_image
            verify = kill_captcha(verify_image,'sax','jpeg')
            # print verify
            # open('/Users/xiaodi/Desktop/temp.jpeg','w').write(verify_image)
            # verify = raw_input('yzm:')
            # data = {'currentPageNo':'1','yzm':'%s'%verify,'cxym':'cxlist','maent.entname':name}
            print type(key)
            print key
            name = key.encode('gbk')
            data = {'currentPageNo':'1','yzm':'%s'%verify,'pName':'%C7%EB%CA%E4%C8%EB%D3%AA%D2%B5%D6%B4%D5%D5%D7%A2%B2%E1%BA%C5','maent.entname':name,'BA_ZCH':'%C7%EB%CA%E4%C8%EB%D3%AA%D2%B5%D6%B4%D5%D5%D7%A2%B2%E1%BA%C5'}
            print data
            search_url = 'http://xygs.snaic.gov.cn/ztxy.do?method=list&djjg=&random=%s'%l
            html = s.post(search_url,data).content
            # open('/Users/xiaodi/Desktop/temp.html','w').write(html)
            varity = re.findall("var flag = '(.*?)'",html)
            print varity
            if varity[0]:
                logger.info('验证码错误')
                raise EOFError
            if u'您搜索的条件无查询结果' in html.decode('gbk'):
                logger.info('验证码错误')
                return ''
            #url_arg = re.findall('<a href="javascript:void\(0\);" onclick="openView\((.*?)\)"', html)[0]
            try:
                url_arg = re.findall('<a href="javascript:void\(0\);" onclick="openView\((.*?)\)"', html)[0]
                sdata = url_arg.split(',')
                invdata = {'method':'qyInfo','djjg':'','maent.pripid':'%s'%sdata[0].replace("'", ""),'maent.entbigtype':'%s'%sdata[1].replace("'", ""), 'random':'%s'%l}
                detail_html = s.post('http://xygs.snaic.gov.cn/ztxy.do',invdata).content
                return (detail_html,invdata)
            except:
                print html
                companystatus = re.findall('<a href="javascript:void\(0\);" >(.+?)</a>(.+?)</li>',html.decode('gbk'))[0]
                return companystatus
        except Exception,e:

            logger.exception(e)
            if loop_num >= 20:
                logger.info('验证码尝试了20次，退出尝试')
                logger.error('保存word在exception日志:%s'%name)
                raise ValueError
                break
            logger.info('验证码错误，正在识别,错误次数%s'%loop_num)
            continue


def run(html1,**args):

    pripid = re.findall('maent.pripid=(.+?)&',html1)[0]
    time_time = int(time.time()*1000)
    data1_person = {"maent.pripid":pripid,"czmk":"czmk2","method":"baInfo","random":time_time}
    data2_abnormal = {"maent.pripid":pripid,"czmk":"czmk6","method":"jyycInfo","random":time_time}
    data3_check = {"maent.pripid":pripid,"czmk":"czmk7","method":"ccjcInfo","random":time_time}
    html2 = requests.post('http://xygs.snaic.gov.cn/ztxy.do',data=data1_person).content.decode('gbk').encode('utf8','ignore')
    html3 = requests.post('http://xygs.snaic.gov.cn/ztxy.do',data=data2_abnormal).content
    html4 = requests.post('http://xygs.snaic.gov.cn/ztxy.do',data=data3_check).content
    # open('/Users/xiaodi/Desktop/temp1.html','w').write(html2)
    detail_html = (html1 +html3 +html4).decode('gbk').encode('utf8','ignore')
    all_html = detail_html+html2
    tables = re.findall('<table[\s\S]+?</table>',detail_html)
    alterList = []
    detail10 = re.findall('<tr width="95%" id="tr_bg_\d+" name="bg">([\s\S]+?)</tr>', html1.decode('gbk'))
    #print '#'*100
    #print detail10
    for i in range(len(detail10)):
        print '@'*100
        detail101 = detail10[i].replace('\r', '').replace('\n', '').replace('\t', '')
        #print detail101
        altltem = re.findall('<td width="15%">(.+?)</td>', detail101)[0]
        altDate = time_cleann(re.findall('<td width="10%" style="text-align:center;">(.+?)</td>', detail101)[0])
        result = re.findall('<td width="25%">[\s\S]+?</td>',detail101)
        if '<span style="width: 100%;">' in result[0]:
            altBe = re.findall('<span style="width: 100%;">([\s\S]+?)</span>',result[0])[0]
        else:
            altBe = re.findall('<span id="beforeMore\d+_\d+" style="display:none;width:100%;">(.+?)<br/>', result[0])[0]
        if '<span style="width: 100%;">' in result[1]:
            altAf = re.findall('<span style="width: 100%;">([\s\S]+?)</span>',result[1])[0]
        else:
            altAf = re.findall('<span id="beforeMore\d+_\d+" style="display:none;width:100%;">(.+?)<br/>', result[1])[0]
        str10_1 = {"altDate":altDate,"altltem":altltem,"altBe":altBe,"altAf":altAf}
        print '*'*100
        print str10_1
        alterList.append(str10_1)

    temp_table = re.findall('<table[\s\S]+?</table>',html2)
    for temp in temp_table:
        print temp
        if '分支机构信息' in temp:
            filiationList = table.index('分支机构信息',temp)
            continue
        if '主要人员信息' in temp:
            personList = table.index('主要人员信息',temp)

    for j in tables:
        word = re.findall('<th colspan="\d" style="text-align:center;">(.+?)<',j.replace('\n','').replace('\r',''))
        try:
            print word[0]
        except:
            print word
            print j
        if '股东信息' in j:
            shareHolderList = shareHolderList_func(j)
            if shareHolderList:
                print shareHolderList
                for i in shareHolderList:
                    share_url = re.findall('''onclick="showRyxx\('(.+?)','(.+?)'\)"''',i['shareHolderdetail'])[0]
                    data_temp = {"maent.pripid":share_url[1],"maent.xh":share_url[0],"method":"tzrCzxxDetial","random":time_time}

                    html_temp = requests.post('http://xygs.snaic.gov.cn/ztxy.do',data=data_temp).content.decode('gbk')
                    result = re.findall('<table[\s\S]+?</table>',html_temp)
                    result = tr(result[0])
                    result = re.findall('<tr>.*?</tr>',result)
                    del result[0]
                    del result[0]
                    del result[0]

                    result = re.findall('<tr>[\s\S]+?</tr>',result[0])
                    result = td_clean(result[0])
                    # print '!'*200
                    result = re.findall('<td>(.*?)</td>',result)
                    i['shareHolderdetail'] = share_url
                    i['subConam'] = result[1]
                    i['conDate'] = time_cleann(result[4])
                    i['fundedRatio'] = ''
                    reg_cap_cur = re.sub('<.*?>', '', result[5])
                    i['regCapCur'] = reg_cap_cur if reg_cap_cur else ''
                    i['country'] = ''
        try:
            if '基本信息' == word[0].strip():
                basicList = table.index(word[0].replace(' ',''),j)
            #if '股东信息' == word[0]:
            #    shareHolderList = table.index('股东信息',j)
            # if '主要人员信息' in word[0].strip():
            #     personList = table.index(word[0].replace(' ',''),j)
            # if '变更信息' == word[0]:
            #     continue
            if '分支机构信息' == word[0]:
                filiationList = table.index(word[0].replace(' ',''),j)
            if '清算信息' == word[0]:
                liquidationList = table.index(word[0].replace(' ',''),j)
            if '经营异常信息' == word[0]:
                print '!!!@##$$'*100
                print j
                abnormalOperation = table.index('经营异常信息',j)
        except: 
            print word
            continue
    try:
        print basicList
    except:
        basicList = []
    try:
        print shareHolderList 
    except:
        shareHolderList = []
    try:
        print personList
    except:
        personList = []
    try:
        print alterList
    except:
        alterList = []
    try:
        print filiationList
    except:
        filiationList = []
    try:
        print liquidationList
    except:
        liquidationList = []
    try:
        print abnormalOperation
    except:
        abnormalOperation = []

    punishBreakList = []
    punishedList = []
    alidebtList =[]
    entinvItemList = []
    frinvList =[]
    frPositionList = []
    caseInfoList = []
    sharesFrostList = []
    sharesImpawnList = []
    morDetailList = []
    morguaInfoList = []
    # html = curl(report_url)
    # report_url = re.findall('"(http://gsxt\.hnaic\.gov\.cn/notice/notice/view_annual.+?)"',html)
    if args.get('type') == 1:
        yearReportList = yearReport(pripid,time_time,type=1)
        yearList = yearReportList[1]
        yearReportList = yearReportList[0]
    else:
        yearReportList = yearReport(pripid,time_time)
    alldata = {'province': 'sax', "abnormalOperation":abnormalOperation,"basicList":basicList,"shareHolderList":shareHolderList,"personList":personList,"punishBreakList":punishBreakList,"punishedList":punishedList,"alidebtList":alidebtList,"entinvItemList":entinvItemList,"frinvList":frinvList,"frPositionList":frPositionList,"alterList":alterList,"filiationList":filiationList,"caseInfoList":caseInfoList,"sharesFrostList":sharesFrostList,"sharesImpawnList":sharesImpawnList,"morDetailList":morDetailList,"morguaInfoList":morguaInfoList,"liquidationList":liquidationList,"yearReportList":yearReportList}
    if args.get('type') == 1:
        html_source = {"province":"sax","type":0,"html":all_html,"keyword":args.get('searchkey', "none"),"companyName":basicList[0]['enterpriseName'],"yearList":yearList}
        companyUrl = {'url':"http://xygs.snaic.gov.cn/ztxy.do", "province":"sax", "companyName":basicList[0]['enterpriseName'], "method":"post","data":json.dumps(args.get('companyData'))}
        return (html_source,alldata,companyUrl)
    return alldata
def search(key):
    html = verify(key)
    html = html[0]
    if html:
        result = run(html)
        return result
    else:
        return {}
def search2(key):
    html = verify(key)
    print '*'*100
    print html
    print '*'*100
    data = html[1]
    html = html[0]
    if html:
        if u'已吊销' in data:
            html_source = {"province":"sax","type":0,"html":'',"keyword":key,"companyName":html,"yearList":''}
            basic  = [{"regNo":'',"enterpriseName":html,"frName":'',"regCap":'',"regCapCur":'',"esDate":'',"openFrom":'',"openTo":'',"enterpriseType":'',"enterpriseStatus":data,"cancelDate":'',"revokeDate":'',"address":'',"abuItem":'',"cbuItem":'',"operateScope":'',"operateScopeAndForm":'',"regOrg":'',"ancheYear":'',"ancheDate":'',"industryPhyCode":'',"industryPhyName":'',"industryCode":'',"industryName":'',"recCap":'',"oriRegNo":''}]
            alldata = {"abnormalOperation":'',"basicList":basic,"shareHolderList":'',"personList":'',"punishBreakList":'',"punishedList":'',"alidebtList":'',"entinvItemList":'',"frinvList":'',"frPositionList":'',"alterList":'',"filiationList":'',"caseInfoList":'',"sharesFrostList":'',"sharesImpawnList":'',"morDetailList":'',"morguaInfoList":'',"liquidationList":'',"yearReportList":''}
            return (html_source,alldata)
        result = run(html, type=1, searchkey=key, companyData=data)
        return result
    else:
        return ()
def search3(data):
    url = data.get('url')
    datatemp = data.get('data')
    html = requests.post(url,data=datatemp).content
    result = run(html)
    return result
if __name__ == '__main__':
    # from pymongo import MongoClient
    # client = MongoClient('192.168.31.121')
    # CO = client['crawler_company_name']['companyName']
    # for i in CO.find({"province":"sax"}):
    #     print i['companyName']
    #     res = search2(i['companyName'])
    #     print type(res)
    #     print res
    print json.dumps(search(u'宝鸡惠民乳品（集团）有限公司'),ensure_ascii=False,indent=4)
    # print search2(u'西安爱特食品有限责任公司')
    #print json.dumps(search3("http://xygs.snaic.gov.cn/ztxy.do",{"maent.entbigtype": "11", "maent.pripid": "6100002018882", "djjg": "", "method": "qyInfo", "random": "1450946987608"}),ensure_ascii=False,indent=4)
#print verify('中国人民财产保险股份有限公司湘潭市雨湖支公司')